{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "### 6 Pandas的函数应用\n",
    "###### 1. 可直接使用 NumPy 的函数"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1)\n",
    "print(df)\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:31:13.356595Z",
     "start_time": "2025-01-17T05:31:13.095429Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -1.041401 -0.038017 -1.893878 -2.289070\n",
      "1 -0.986316 -1.799427 -0.473184  0.396661\n",
      "2 -1.522086  0.468200 -0.618345 -1.098038\n",
      "3  0.161043 -0.554499 -0.307393 -1.857956\n",
      "4 -1.236249 -0.408393  0.677238 -1.970879\n",
      "          0         1         2         3\n",
      "0  1.041401  0.038017  1.893878  2.289070\n",
      "1  0.986316  1.799427  0.473184  0.396661\n",
      "2  1.522086  0.468200  0.618345  1.098038\n",
      "3  0.161043  0.554499  0.307393  1.857956\n",
      "4  1.236249  0.408393  0.677238  1.970879\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "###### 2. 通过 apply 将函数应用到列或行上"
  },
  {
   "cell_type": "code",
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0\n",
    "print(df.apply(lambda x : x.max()))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:32:33.328952Z",
     "start_time": "2025-01-17T05:32:33.325287Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.161043\n",
      "1    0.468200\n",
      "2    0.677238\n",
      "3    0.396661\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": [
    "# apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:33:54.205249Z",
     "start_time": "2025-01-17T05:33:54.201391Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.038017\n",
      "1    0.396661\n",
      "2    0.468200\n",
      "3    0.161043\n",
      "4    0.677238\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "###### 3. 通过 map 将函数应用到每个数据上"
  },
  {
   "cell_type": "code",
   "source": [
    "# 使用map应用到每个数据\n",
    "print(df.map(lambda x : '%.2f' % x))\n",
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:35:01.689420Z",
     "start_time": "2025-01-17T05:35:01.683420Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2      3\n",
      "0  -1.04  -0.04  -1.89  -2.29\n",
      "1  -0.99  -1.80  -0.47   0.40\n",
      "2  -1.52   0.47  -0.62  -1.10\n",
      "3   0.16  -0.55  -0.31  -1.86\n",
      "4  -1.24  -0.41   0.68  -1.97\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    float64\n",
       "1    float64\n",
       "2    float64\n",
       "3    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "str"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:27:27.258699100Z",
     "start_time": "2024-04-11T03:27:27.224712Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": "###### 4 索引排序（不重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) # 索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:39:42.423996Z",
     "start_time": "2025-01-17T05:39:42.420129Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3 3 1 4 0]\n",
      "1    10\n",
      "0    11\n",
      "0    12\n",
      "0    13\n",
      "4    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    11\n",
      "0    12\n",
      "0    13\n",
      "1    10\n",
      "4    14\n",
      "dtype: int64\n",
      "1    10\n",
      "0    11\n",
      "0    12\n",
      "0    13\n",
      "4    14\n",
      "dtype: int64\n",
      "1    10\n",
      "0    11\n",
      "0    12\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": "# print(s4.loc[0:1]) # loc索引值唯一时可以切片",
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:42:45.388180Z",
     "start_time": "2025-01-17T05:42:45.384911Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Series([], dtype: int64)\n"
     ]
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "# 轴0是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T05:44:37.249074Z",
     "start_time": "2025-01-17T05:44:37.244371Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         3         1         3         3\n",
      "2  0.586648 -1.367062 -0.266990 -1.291943  2.156599\n",
      "0  0.641021  0.737206  1.370623 -0.269810  0.672908\n",
      "1 -1.783584  0.383366  0.708971 -0.444527  0.598627\n",
      "2 -0.296591 -0.049288 -0.361653  0.093839 -1.061425\n",
      "2  0.268159  0.397557  0.348851  1.073938  0.214592\n",
      "          0         3         1         3         3\n",
      "2  0.586648 -1.367062 -0.266990 -1.291943  2.156599\n",
      "2  0.268159  0.397557  0.348851  1.073938  0.214592\n",
      "2 -0.296591 -0.049288 -0.361653  0.093839 -1.061425\n",
      "1 -1.783584  0.383366  0.708971 -0.444527  0.598627\n",
      "0  0.641021  0.737206  1.370623 -0.269810  0.672908\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "code",
   "source": [
    "# 轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-17T05:45:47.281664Z",
     "start_time": "2025-01-17T05:45:47.277699Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         3         3         3\n",
      "2  0.586648 -0.266990 -1.367062 -1.291943  2.156599\n",
      "0  0.641021  1.370623  0.737206 -0.269810  0.672908\n",
      "1 -1.783584  0.708971  0.383366 -0.444527  0.598627\n",
      "2 -0.296591 -0.361653 -0.049288  0.093839 -1.061425\n",
      "2  0.268159  0.348851  0.397557  1.073938  0.214592\n"
     ]
    }
   ],
   "execution_count": 24
  },
  {
   "cell_type": "markdown",
   "source": "###### 5.按值排序（机器学习，深度学习不重要，数据分析才需要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "print(df4)\n",
    "print('-'*50)\n",
    "#按轴0排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:25:41.788128Z",
     "start_time": "2025-01-17T06:25:41.782803Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    0   1   2   3\n",
      "0  99  58  16   4\n",
      "1  96  79   5  94\n",
      "2   4  21  82  26\n",
      "3  64  18  42   0\n",
      "4  48  38  96  24\n",
      "5  14  37  62  21\n",
      "--------------------------------------------------\n",
      "    0   1   2   3\n",
      "1  96  79   5  94\n",
      "2   4  21  82  26\n",
      "4  48  38  96  24\n",
      "5  14  37  62  21\n",
      "0  99  58  16   4\n",
      "3  64  18  42   0\n"
     ]
    }
   ],
   "execution_count": 25
  },
  {
   "cell_type": "code",
   "source": [
    "#按轴1排序，by后行索引名，交换的是列\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-17T06:28:06.973291Z",
     "start_time": "2025-01-17T06:28:06.969526Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    0   2   1   3\n",
      "0  99  16  58   4\n",
      "1  96   5  79  94\n",
      "2   4  82  21  26\n",
      "3  64  42  18   0\n",
      "4  48  96  38  24\n",
      "5  14  62  37  21\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "###### 6.处理缺失数据（重要）\n",
    "- 1.判断是否存在缺失值：isnull()\n",
    "- 2.丢弃缺失数据：dropna()\n",
    "- 3.填充缺失数据：fillna()"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],\n",
    "                       [np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:12.553300Z",
     "start_time": "2025-01-17T06:40:12.549240Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.874984  0.473648 -1.730277\n",
      "1  1.000000  2.000000       NaN\n",
      "2       NaN  4.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 45
  },
  {
   "cell_type": "code",
   "source": "df_data.iloc[2,0]",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:12.975583Z",
     "start_time": "2025-01-17T06:40:12.973196Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(nan)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 46
  },
  {
   "cell_type": "code",
   "source": [
    "# 1.isnull来判断是否有缺失值\n",
    "print(df_data.isnull())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:13.321677Z",
     "start_time": "2025-01-17T06:40:13.318216Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n"
     ]
    }
   ],
   "execution_count": 47
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:13.609333Z",
     "start_time": "2025-01-17T06:40:13.606680Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#帮我计算df_data缺失率(按列统计)\n",
    "print(df_data.isnull().sum()/len(df_data)) "
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.25\n",
      "1    0.00\n",
      "2    0.50\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 48
  },
  {
   "cell_type": "markdown",
   "source": "###### 删除缺失数据",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第一列来删除,第一列有空值就删除对应的行\n",
    "print(df_data.dropna(subset=[0]))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:14.234544Z",
     "start_time": "2025-01-17T06:40:14.229948Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.874984  0.473648 -1.730277\n",
      "1  1.000000  2.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 49
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:14.606533Z",
     "start_time": "2025-01-17T06:40:14.602617Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.874984  0.473648 -1.730277\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.874984</td>\n",
       "      <td>0.473648</td>\n",
       "      <td>-1.730277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 50
  },
  {
   "cell_type": "code",
   "source": [
    "# 用的不多，用在某个特征缺失太多时，才会进行删除\n",
    "print(df_data.dropna(axis=1))  #某列有nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:14.893140Z",
     "start_time": "2025-01-17T06:40:14.890068Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1\n",
      "0  0.473648\n",
      "1  2.000000\n",
      "2  4.000000\n",
      "3  2.000000\n"
     ]
    }
   ],
   "execution_count": 51
  },
  {
   "cell_type": "markdown",
   "source": "###### 填充缺失数据",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:16.390957Z",
     "start_time": "2025-01-17T06:40:16.386697Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 均值，中位数，众数填充\n",
    "df_data"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.874984  0.473648 -1.730277\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.874984</td>\n",
       "      <td>0.473648</td>\n",
       "      <td>-1.730277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 52
  },
  {
   "cell_type": "code",
   "source": [
    "# 给零列的空值填为-100，按特征（按列）去填充\n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data  # 没变"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:17.138085Z",
     "start_time": "2025-01-17T06:40:17.133476Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0      0.874984\n",
      "1      1.000000\n",
      "2   -100.000000\n",
      "3      1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.874984  0.473648 -1.730277\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.874984</td>\n",
       "      <td>0.473648</td>\n",
       "      <td>-1.730277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 53
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-17T06:38:49.574795Z",
     "start_time": "2025-01-17T06:38:49.573011Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 要被删除的方法，别用了\n",
    "# df_data.iloc[:,0].fillna(-100.,inplace=True)  # inplace=True后面会被删除\n",
    "# df_data  # 改变"
   ],
   "outputs": [],
   "execution_count": 41
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:21.105085Z",
     "start_time": "2025-01-17T06:40:21.101210Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df_data.iloc[:,2]=df_data.iloc[:,2].fillna(df_data.iloc[:,2].mean()) #用均值填充空值\n",
    "df_data"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.874984  0.473648 -1.730277\n",
       "1  1.000000  2.000000  0.634861\n",
       "2       NaN  4.000000  0.634861\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.874984</td>\n",
       "      <td>0.473648</td>\n",
       "      <td>-1.730277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.634861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.634861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 54
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-17T06:40:45.010297Z",
     "start_time": "2025-01-17T06:40:45.006493Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.874984\n",
      "1    1.000000\n",
      "2         NaN\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n",
      "0    0.473648\n",
      "1    2.000000\n",
      "2    4.000000\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0   -1.730277\n",
      "1    0.634861\n",
      "2    0.634861\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 55
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": ""
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
